{% set session = 'tr1616' %}
{% set image = "ccr.ccs.tencentyun.com/sc2ai/tleague-scii4100:20200416153354" %}
{% set learner_image = "ccr.ccs.tencentyun.com/sc2ai/tleague-gpu-hvd-scii4100:20200416153354" %}
{% set docker_registry_credential = "tke-dockreg-cred" %}
{% set require_resources = true %}
{% set pvc_name = "pvc-share-full" %}
{% set chkpoints_zoo_pvc_sub_dir = "chkpoints_zoo/" %}
{% set chkpoints_pvc_sub_dir = chkpoints_zoo_pvc_sub_dir + session + "_chkpoints" %}
{% set replay_ds_pvc_sub_dir = "replay_ds/" %}
{% set replay_ds_local_root = "/root/replay_ds/" %}
{# common #}
{% set env = "sc2full_formal7_dict" %}
{% set policy = "tpolicies.net_zoo.mnet_v5.mnet_v5d5" %}
{% set policy_config = {
  'use_xla': True,
  'test': False,
  'use_loss_type': 'rl',
  'use_value_head': True,
  'use_self_fed_heads': False,
  'use_lstm': True,
  'nlstm': 256,
  'hs_len': 256*2,
  'lstm_duration': 1,
  'lstm_dropout_rate': 0.0,
  'lstm_cell_type': 'lstm',
  'lstm_layer_norm': True,
  'weight_decay': 0.00002,
  'arg_scope_type': 'type_b',
  'endpoints_verbosity': 10,
  'n_v': 7,
  'distillation': True,
  'fix_all_embed': False,
  'use_base_mask': True,
  'zstat_embed_version': 'v3',
  'sync_statistics': 'horovod',
  'temperature': 0.8,
  'merge_pi': False,
} %}
{% set unroll_length = 256 %}
{% set rollout_length = 16 %}
{# model pool#}
{% set n_model_pools = 2 %}
{% set model_pool_port1 = 10003 %}
{% set model_pool_port2 = 10004 %}
{% set model_pool_verbose = 0 %}
{# league mgr #}
{% set league_mgr_port = 20005 %}
{% set game_mgr_type = "AEMatchMakingGameMgrV2" %}
{% set game_mgr_config = {
  'lrn_id_list': ['lrngrp0', 'lrngrp1', 'lrngrp2'],
  'main_agent_pfsp_prob': 0.7,
  'main_agent_forgotten_prob': 0.2,
  'main_agent_forgotten_me_winrate_thre': 0.5,
  'main_agent_forgotten_ma_winrate_thre': 0.7,
  'main_exploiter_min_lps': 3,
  'main_exploiter_max_lps': 4,
  'main_exploiter_prob_thre': 0.15,
  'cyclic_exploiter_n_leaves': 4,
  'cyclic_exploiter_root_name': 'None:init_model',
  'cyclic_exploiter_prob_thre': 0.15
 } %}
{% set mutable_hyperparam_type = "DiscreteDistribHyperparamV2" %}
{% set hyperparam_config_name = {
  'learning_rate': 0.00001,
  'lam': 0.99,
  'gamma': 1.0,
  'burn_in_timesteps': 9000000,
  'distill_model_key': 'None:init_model',
  'lrn_id_to_n_distrib': {'lrngrp0': 8, 'lrngrp1': 8, 'lrngrp2': 8},
  'lrn_id_to_max_n_cyclic': {'lrngrp0': None, 'lrngrp1': 8, 'lrngrp2': 4},
  'lrn_id_to_distrib_type': {
    'lrngrp0': 'uniform',
    'lrngrp1': 'cyclic_one_hot',
    'lrngrp2': 'cyclic_one_hot'
    },
  'lrn_id_to_zeroing_prob': {'lrngrp0': 0.1, 'lrngrp1': 0.1, 'lrngrp2': 0.0},
  'lrn_id_to_reward_weights': {
    'lrngrp0': [1.0, 0.8, 0.8, 0.8, 0.8, 0.8, 0.5],
    'lrngrp1': [1.0, 0.8, 0.8, 0.8, 0.8, 0.8, 0.5],
    'lrngrp2': [1.0, 0.8, 0.8, 0.8, 0.8, 0.8, 0.5],
  },
  'lrn_id_to_reward_weights_activate_prob': {
    'lrngrp0': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
    'lrngrp1': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
    'lrngrp2': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
  }
} %}
{% set league_mgr_restore_checkpoint_dir = "" %}
{% set league_mgr_chkpoints_dir = "/root/results/" %}
{% set league_mgr_save_checkpoint_root = league_mgr_chkpoints_dir + session + "_chkpoints" %}
{% set league_mgr_save_interval_secs = 21600 %}
{% set mute_actor_msg = true %}
{% set pseudo_learner_num = -1 %}
{% set init_model_path = "/root/results/tr1613-prev_chkpoints/init_model:0001_20200413210144.model" %}
{% set league_mgr_verbose = 0 %}
{# learners #}
{% set n_lrn_groups = 3 %}
{% set n_hosts_per_lrn_group = 2 %}
{% set n_gpus_per_host = 8 %}
{% set hvd_ssh_port = 9527 %}
{% set lrn_port_base = 30003 %}
{% set batch_size = 64 %}
{% set lrn_rm_size = 32000 %}
{% set lrn_pub_interval = 200 %}
{% set lrn_log_interval = 100 %}
{% set lrn_burn_in_timesteps = 0 %}
{% set n_v = 7 %}
{% set lrn_rwd_shape = true %}
{% set lrn_tb_port = 9003 %}
{% set lrngrp_learner_config = [
  {'vf_coef': [5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
   'max_grad_norm': 1.0,
   'distill_coef': [0.15, 0.005, 0.002, 0.002, 0.002, 0.004, 0.0067, 0.003, 0.02, 0.0033, 0.005],
   'ent_coef': [0.00001, 0.00005, 0.00002, 0.00002, 0.00002, 0.00004, 0.000067, 0.00003, 0.0002, 0.000033, 0.00005]},
  {'vf_coef': [5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
   'max_grad_norm': 1.0,
   'distill_coef': [0.15, 0.005, 0.002, 0.002, 0.002, 0.004, 0.0067, 0.003, 0.02, 0.0033, 0.005],
   'ent_coef': [0.00001, 0.00005, 0.00002, 0.00002, 0.00002, 0.00004, 0.000067, 0.00003, 0.0002, 0.000033, 0.00005]},
  {'vf_coef': [5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
   'max_grad_norm': 1.0,
   'distill_coef': [0.15, 0.005, 0.002, 0.002, 0.002, 0.004, 0.0067, 0.003, 0.02, 0.0033, 0.005],
   'ent_coef': [0.00001, 0.00005, 0.00002, 0.00002, 0.00002, 0.00004, 0.000067, 0.00003, 0.0002, 0.000033, 0.00005]}
]%}
{% set lrngrp_total_timesteps = [
    200000000,
    200000000,
    100000000
] %}
{# actors per learner #}
{% set n_actors_per_learner = 20 %}
{% set actor_distillation = true %}
{% set actor_update_model_freq = 320 %}
{% set actor_rwd_shape = false %}
{% set actor_log_interval_steps = 51 %}
{% set actor_verbose = 11 %}
{% set actor_replay_dir = "/root/replays/" %}
{% set lrngrp_interface_config = [
  {'zstat_data_src': '/root/replay_ds/rp1522-mmr-ge6000-winner-selected-8',
   'mmr': 7000,
   'max_bo_count': 50,
   'max_bobt_count': 20,
   'raw_selection': True,
   'zstat_presort_order_name': '',
   'zmaker_version': 'v5'},
  {'zstat_data_src': '/root/replay_ds/rp1522-mmr-ge6000-winner-selected-8',
   'mmr': 7000,
   'max_bo_count': 50,
   'max_bobt_count': 20,
   'raw_selection': True,
   'zstat_presort_order_name': 'general_a',
   'zmaker_version': 'v5'},
  {'zstat_data_src': '/root/replay_ds/rp1522-mmr-ge6000-winner-selected-8',
   'mmr': 7000,
   'max_bo_count': 50,
   'max_bobt_count': 20,
   'raw_selection': True,
   'zstat_presort_order_name': 'cyclic_b',
   'zmaker_version': 'v5'}
]%}
{% set lrngrp_env_config = [
  {'use_trt': True,
   'astar_rwd_version': 'v4'},
  {'use_trt': False,
   'astar_rwd_version': 'v4'},
  {'use_trt': False,
   'astar_rwd_version': 'v4'},
] %}

{# --- league manager --- #}
{% if true %}
---
kind: Service
apiVersion: v1
metadata:
  name: {{ session }}-league-mgr
  labels:
    session: {{ session }}
    job: league-mgr
    type: league-mgr
spec:
  selector:
    session: {{ session }}
    job: league-mgr
  ports:
  - port: {{ league_mgr_port }}
    name: port1
---
apiVersion: v1
kind: Pod
metadata:
  name: {{ session }}-league-mgr
  labels:
    session: {{ session }}
    type: league-mgr
    job: league-mgr
spec:
  nodeSelector:
    type: network
  restartPolicy: Never  # if failure, let it die
  volumes:
    - name: data-dir
      persistentVolumeClaim:
        claimName: {{ pvc_name }}
{% if docker_registry_credential %}
  imagePullSecrets:
  - name: {{ docker_registry_credential }}
{% endif %}
  containers:
    - name: {{ session }}-league-mgr-container
      image: {{ image }}
      ports:
      - containerPort: {{ league_mgr_port }}
{% if require_resources %}
      resources:
        limits:
          nvidia.com/gpu: 0
        requests:
          nvidia.com/gpu: 0
          cpu: 60
          memory: 200Gi
{% endif %}
      volumeMounts:
        - mountPath: {{ league_mgr_chkpoints_dir }}
          name: data-dir
          subPath: {{ chkpoints_zoo_pvc_sub_dir }}
      command:
      - "python"
      args:
      - "-m"
      - "tleague.scripts.run_league_mgr"
{% set sep = joiner(',') %}
      - "--model_pool_addrs={% for i in range(n_model_pools) %}{{ sep() }}{{ session }}-mp{{ i }}:{{ model_pool_port1 }}:{{ model_pool_port2 }}{% endfor %}"
      - "--port={{ league_mgr_port }}"
      - "--game_mgr_type={{ game_mgr_type }}"
      - "--game_mgr_config={{ game_mgr_config }}"
      - "--mutable_hyperparam_type={{ mutable_hyperparam_type }}"
      - "--hyperparam_config_name={{ hyperparam_config_name }}"
      - "--restore_checkpoint_dir={{ league_mgr_restore_checkpoint_dir }}"
      - "--save_checkpoint_root={{ league_mgr_save_checkpoint_root }}"
      - "--save_interval_secs={{ league_mgr_save_interval_secs }}"
      - "--{% if mute_actor_msg %}mute_actor_msg{% else %}nomute_actor_msg{% endif %}"
      - "--verbose={{ league_mgr_verbose }}"
      - "--pseudo_learner_num={{ pseudo_learner_num }}"
      - "--init_model_path={{ init_model_path }}"
{% endif %}
{# --- model pools --- #}
{% if true %}
{% for i in range(n_model_pools) %}
---
kind: Service
apiVersion: v1
metadata:
  name: {{ session }}-mp{{ i }}
  labels:
    session: {{ session }}
    job: model-pool-{{ i }}
    type: model-pool
spec:
  selector:
    session: {{ session }}
    job: model-pool-{{ i }}
  ports:
  - port: {{ model_pool_port1 }}
    name: port1
  - port: {{ model_pool_port2 }}
    name: port2
---
apiVersion: v1
kind: Pod
metadata:
  name: {{ session }}-mp{{ i }}
  labels:
    session: {{ session }}
    job: model-pool-{{ i }}
    type: model-pool
spec:
  nodeSelector:
    type: network
{% if docker_registry_credential %}
  imagePullSecrets:
  - name: {{ docker_registry_credential }}
{% endif %}
  restartPolicy: Never  # if failure, let it die
  containers:
    - name: {{ session }}-model-pool-container
      image: {{ image }}
      ports:
      - containerPort: {{ model_pool_port1 }}
      - containerPort: {{ model_pool_port2 }}
{% if require_resources %}
      resources:
        limits:
          nvidia.com/gpu: 0
        requests:
          nvidia.com/gpu: 0
          cpu: 30
          memory: 100Gi
{% endif %}
      command:
      - "python"
      args:
      - "-m"
      - "tleague.scripts.run_model_pool"
      - "--ports={{ model_pool_port1 }}:{{ model_pool_port2 }}"
      - "--verbose={{ model_pool_verbose }}"
{% endfor %}
{% endif %}
{# --- learners and actors per learner --- #}
{% if true %}
{% for i in range(n_lrn_groups) %}
{% for j in range(n_hosts_per_lrn_group - 1, -1, -1) %}
{# --- each host corresponds to a service owning a DNS name #}
---
kind: Service
apiVersion: v1
metadata:
  name: {{ session }}-lg{{ i }}-h{{ j }}
  labels:
    session: {{ session }}
    type: learner
spec:
  selector:
    session: {{ session }}
    type: learner
    group: group-{{ i }}
    host: host-{{ j }}
  ports:
  - port: {{ hvd_ssh_port }}
    name: port-ssh
{% for k in range(n_gpus_per_host) %}
  - port: {{ lrn_port_base + 2*k}}
    name: port{{ 2*k }}
  - port: {{ lrn_port_base + 2*k + 1 }}
    name: port{{ 2*k + 1 }}
{% endfor %}
{% if lrn_tb_port %}
  - port: {{ lrn_tb_port }}
    name: port-tb
{% endif %}
---
apiVersion: v1
kind: Pod
metadata:
  name: {{ session }}-lg{{ i }}-h{{ j }}
  labels:
    session: {{ session }}
    type: learner
    group: group-{{ i }}
    host: host-{{ j }}
spec:
  nodeSelector:
    type: gpu
  restartPolicy: Never  # if failure, let it die
  volumes:
  - name: training-log-dir
    emptyDir: {}
{% if docker_registry_credential %}
  imagePullSecrets:
  - name: {{ docker_registry_credential }}
{% endif %}
  containers:
    - name: {{ session }}-lg{{ i }}-h{{ j }}-container
      image: {{ learner_image }}
      ports:
      - containerPort: {{ hvd_ssh_port }}
{% for k in range(n_gpus_per_host) %}
      - containerPort: {{ lrn_port_base + 2*k }}
      - containerPort: {{ lrn_port_base + 2*k + 1}}
{% endfor %}
{% if lrn_tb_port %}
      - containerPort: {{ lrn_tb_port }}
{% endif %}
{% if require_resources %}
      resources:
        limits:
          nvidia.com/gpu: {{ n_gpus_per_host }}
        requests:
          nvidia.com/gpu: {{ n_gpus_per_host }}
          cpu: 68
          memory: 260Gi
{% endif %}
      env:
      - name: NONCCL_DEBUG
        value: "INFO"
{% if j == 0 %}
{# --- run the mpirun/horovodrun command --- #}
      volumeMounts:
      - name: training-log-dir
        mountPath: /root/work/training_log
      command:
      - "tleague_horovodrun"
      args:
      - "--verbose"
      - "--exclude-env-vars-pattern"
      - "{{ session }}"
      - "--start-timeout"
      - "1800"
      - "-p"
      - "{{ hvd_ssh_port }}"
      - "-np"
      - "{{ n_hosts_per_lrn_group * n_gpus_per_host }}"
      - "-H"
{% set sep = joiner(',') %}
      - "{% for jj in range(n_hosts_per_lrn_group) %}{{ sep() }}{{ session }}-lg{{ i }}-h{{ jj }}:{{ n_gpus_per_host }}{% endfor %}"
      - "python"
      - "-m"
      - "tleague.scripts.run_hvd_ppo_learner2"
      - "--league_mgr_addr={{ session }}-league-mgr:{{ league_mgr_port }}"
{% set sep = joiner(',') %}
      - "--model_pool_addrs={% for i in range(n_model_pools) %}{{ sep() }}{{ session }}-mp{{ i }}:{{ model_pool_port1 }}:{{ model_pool_port2 }}{% endfor %}"
{% for ind_host in range(n_hosts_per_lrn_group) %}
{% set sep = joiner(',') %}
      - "--learner_spec={% for gpu_id in range(n_gpus_per_host) %}{{ sep() }}{{ gpu_id }}:{{ lrn_port_base + 2*gpu_id }}:{{ lrn_port_base + 2*gpu_id + 1 }}{% endfor %}"
{% endfor %}
      - "--learner_id=lrngrp{{ i }}"
      - "--unroll_length={{ unroll_length }}"
      - "--rollout_length={{ rollout_length }}"
      - "--batch_size={{ batch_size }}"
      - "--rm_size={{ lrn_rm_size }}"
      - "--pub_interval={{ lrn_pub_interval }}"
      - "--log_interval={{ lrn_log_interval }}"
      - "--total_timesteps={{ lrngrp_total_timesteps[i] }}"
      - "--burn_in_timesteps={{ lrn_burn_in_timesteps }}"
      - "--env={{ env }}"
      - "--policy={{ policy }}"
      - "--policy_config={{ policy_config }}"
      - "--{% if lrn_rwd_shape %}rwd_shape{% else %}norwd_shape{% endif %}"
      - "--batch_worker_num={{ 4 }}"
      - "--learner_config={{ lrngrp_learner_config[i] }}"
{% else %}
{# --- start an ssh deamon and run an arbitray command that occupies the container --- #}
      command:
      - "bash"
      - "-c"
      args:
      - "/usr/sbin/sshd -p {{ hvd_ssh_port }}; sleep {{ 3600 * 24 * 7 * 52 * 3}}"
{% endif %}
{% if j==0 and lrn_tb_port %}
{# --- start tensorboard when applicable --- #}
    - name: {{ session }}-tb-lrngrp{{ i }}rank0-container
      image: {{ learner_image }}
      ports:
      - containerPort: {{ lrn_tb_port }}
      volumeMounts:
      - name: training-log-dir
        mountPath: /root/training_log
      env:
      - name: CUDA_VISIBLE_DEVICES
        value: ""
      command:
      - "tensorboard"
      args:
      - "--logdir=/root/training_log/lrngrp{{ i }}rank0"
      - "--port={{ lrn_tb_port }}"
{% endif %}
{# --- endif j == 0 --- #}
{% for k in range(n_gpus_per_host) %}
{# --- the actors correspond to group i host j localrank k--- #}
---
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
  name: {{ session }}-actor-lg{{ i }}-h{{ j }}-localrank{{ k }}
  labels:
    session: {{ session }}
    type: actor
spec:
  replicas: {{ n_actors_per_learner }}
  selector:
    matchLabels:
      session: {{ session }}
      type: actor
      group: group-{{ i }}
      host: host-{{ j }}
      localrank: localrank-{{ k }}
  template:
    metadata:
      labels:
        session: {{ session }}
        type: actor
        group: group-{{ i }}
        host: host-{{ j }}
        localrank: localrank-{{ k }}
    spec:
      nodeSelector:
        type: cpu
      volumes:
      - name: data-dir
        persistentVolumeClaim:
          claimName: {{ pvc_name }}
{% if docker_registry_credential != "" %}
      imagePullSecrets:
      - name: {{ docker_registry_credential }}
{% endif %}
      containers:
      - name: {{ session }}-actor-lg{{ i }}-h{{ j }}-localrank{{ k }}-container
        image: {{ image }}
        imagePullPolicy: IfNotPresent
{% if require_resources %}
        resources:
          limits:
            nvidia.com/gpu: 0
          requests:
            nvidia.com/gpu: 0
            cpu: 3500m
            memory: 6Gi
        volumeMounts:
        - name: data-dir
          mountPath: {{ replay_ds_local_root }}
          subPath: {{ replay_ds_pvc_sub_dir }}
{% endif %}
        command:
        - "python"
        args:
        - "-m"
        - "tleague.scripts.run_ppo_actor"
        - "--league_mgr_addr={{ session }}-league-mgr:{{ league_mgr_port }}"
{% set sep = joiner(',') %}
        - "--model_pool_addrs={% for i in range(n_model_pools) %}{{ sep() }}{{ session }}-mp{{ i }}:{{ model_pool_port1 }}:{{ model_pool_port2 }}{% endfor %}"
        - "--learner_addr={{ session }}-lg{{ i }}-h{{ j }}:{{ lrn_port_base + 2*k }}:{{ lrn_port_base + 2*k + 1 }}"
        - "--unroll_length={{ unroll_length }}"
        - "--update_model_freq={{ actor_update_model_freq }}"
        - "--env={{ env }}"
        - "--env_config={{ lrngrp_env_config[i] }}"
        - "--policy={{ policy }}"
        - "--policy_config={{ policy_config }}"
        - "--verbose={{ actor_verbose }}"
        - "--log_interval_steps={{ actor_log_interval_steps }}"
        - "--n_v={{ n_v }}"
        - "--{% if actor_rwd_shape %}rwd_shape{% else %}norwd_shape{% endif %}"
        - "--{% if actor_distillation %}distillation{% else %}nodistillation{% endif %}"
        - "--interface_config={{ lrngrp_interface_config[i] }}"
        - "--replay_dir={{ actor_replay_dir }}"
{% endfor %}
{# --- endfor k --- #}
{% endfor %}
{# --- endfor j --- #}
{% endfor %}
{# --- endfor i --- #}
{% endif %}
{# --- endif true/false --- #}